getwd()
## [1] "C:/Users/hoaho/OneDrive/Desktop/Classworks/Math 4753 Statistic/Lab3"
spruce=read.csv("SPRUCE.csv")
head(spruce,6)
## BHDiameter Height
## 1 18.9 20.0
## 2 16.6 18.8
## 3 15.5 16.8
## 4 15.5 16.9
## 5 19.4 20.2
## 6 13.7 16.3
with(spruce,plot(BHDiameter, Height, main="SPRUCE",
xlab="BHDiameter", ylab="Height", pch=21, bg="Blue", cex=1.2,
xlim=c(0, 1.1*max(BHDiameter)),ylim=c(0, 1.1*max(Height))))
NO, it appears somewhat curved
library(s20x)
layout(matrix(1:4,nr=2,nc=2,byrow=TRUE))
with(spruce,trendscatter(Height ~ BHDiameter , f=0.5, main = "f=0.5"))
with(spruce,trendscatter(Height ~ BHDiameter, f=0.6, main = "f=0.6"))
with(spruce,trendscatter(Height ~ BHDiameter, f=0.7, main = "f=0.7"))
spruce.lm= with(spruce, lm(Height ~ BHDiameter))
lo <-with(spruce, loess(Height ~ BHDiameter))
with(spruce,plot(BHDiameter, Height, main="SPRUCE",
xlab="BHDiameter", ylab="Height", pch=21, bg="Blue", cex=1.2,
xlim=c(0, 1.1*max(BHDiameter)),ylim=c(0, 1.1*max(Height))))
abline(spruce.lm,col="red")
xl <- with(spruce,seq(min(BHDiameter),max(BHDiameter), (max(BHDiameter) - min(BHDiameter))/1000))
lines(xl, predict(lo,xl), col='black', lwd=2)
The straight line look a bit off. The smooth curve fit better
layout(matrix(1:4,nr=2,nc=2,byrow=TRUE))
##Graph 1
with(spruce,plot(BHDiameter, Height, main="Fit line",
xlab="BHDiameter", ylab="Height", pch=21, bg="Blue", cex=1.2,
xlim=c(0, 1.1*max(BHDiameter)),ylim=c(0, 1.1*max(Height))))
abline(spruce.lm,col="red")
##Graph 2
with(spruce,plot(BHDiameter, Height, main="With RSS",
xlab="BHDiameter", ylab="Height", pch=21, bg="Blue", cex=1.2,
xlim=c(0, 1.1*max(BHDiameter)),ylim=c(0, 1.1*max(Height))))
yhat=with(spruce,predict(spruce.lm,data.frame(BHDiameter)))
with(spruce,{
segments(BHDiameter,Height,BHDiameter,yhat)
})
abline(spruce.lm, col="red")
RSS=with(spruce,sum((Height-yhat)^2))
RSS
## [1] 95.70281
##Graph 3
with(spruce,plot(BHDiameter, Height, main="Mean of Height vs BHDiameter",
xlab="BHDiameter", ylab="Height", pch=21, bg="Blue", cex=1.2,
xlim=c(0, 1.1*max(BHDiameter)),ylim=c(0, 1.1*max(Height))))
with(spruce, abline(h=mean(Height)))
abline(spruce.lm,col="red")
with(spruce, segments(BHDiameter,mean(Height),BHDiameter,yhat,col="red"))
MSS=with(spruce,sum((yhat-mean(Height))^2))
MSS
## [1] 183.2447
##Graph 4
with(spruce,plot(BHDiameter, Height, main="With total deviation line segments",
xlab="BHDiameter", ylab="Height", pch=21, bg="Blue", cex=1.2,
xlim=c(0, 1.1*max(BHDiameter)),ylim=c(0, 1.1*max(Height))))
with(spruce,abline(h=mean(Height)))
with(spruce, segments(BHDiameter,Height,BHDiameter,mean(Height),col="green"))
TSS=with(spruce,sum((Height-mean(Height))^2))
TSS
## [1] 278.9475
MSS
## [1] 183.2447
RSS
## [1] 95.70281
MSS/TSS
## [1] 0.6569146
This number is our coefficient of determination. It is 0.66 meaning that only about 66% of the data fit the model. Therefore, this linear model is not very good
MSS+RSS
## [1] 278.9475
Yes
summary(spruce.lm)
##
## Call:
## lm(formula = Height ~ BHDiameter)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9394 -0.9763 0.2829 0.9950 2.6644
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.14684 1.12131 8.157 1.63e-09 ***
## BHDiameter 0.48147 0.05967 8.069 2.09e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.678 on 34 degrees of freedom
## Multiple R-squared: 0.6569, Adjusted R-squared: 0.6468
## F-statistic: 65.1 on 1 and 34 DF, p-value: 2.089e-09
coef(spruce.lm)
## (Intercept) BHDiameter
## 9.1468390 0.4814743
0.4814743
9.1468390
Height = 9.1468390 + 0.4814743*BHDiameter
predict(spruce.lm, data.frame(BHDiameter=c(15,18,20)))
## 1 2 3
## 16.36895 17.81338 18.77632
library(ggplot2)
g=ggplot(spruce, aes(x=BHDiameter,y=Height,colour=BHDiameter))
g=g+geom_point() + geom_line()+ geom_smooth(method="lm")
g+ggtitle("Height Vs BHDiameter")